

library(readstata13)


######  Set Working Drive

setwd("/Users/aspearot/Dropbox/MaLT 2017/Draft/submission_draft/ReStat/resubmission 2/replication package/malt/")


######  Manyara 
 
######  Agrovet Trade Costs

x<-read.dta13("Data/Manyara Farmer_mktaccess.dta")

x<-subset(x,fert_use2017=="1. Yes")

##syntax here is trip_fert_type2017name_[trip number]_[agrovet number]_[n-th choice of fertilizer type]
##So, as an example, x$trip_fert_type2017name_1_2_1 is the type of fertilizer purchased during the first trip, at the second agrovet visited.  There is no x$trip_fert_type2017name_1_2_2 because there were no first trips in manyara where at the second agrovet visited a second type was purchased.
#In these next few lines, we determine whether fertilizer and seeds were purchased on each trip.

x$boughtfert1<-ifelse((x$trip_fert_type2017name_1_1_1!=""|x$trip_fert_type2017name_1_1_2!=""|x$trip_fert_type2017name_1_2_1!=""),1,0)
x$boughtfert2<-ifelse((x$trip_fert_type2017name_2_1_1!=""|x$trip_fert_type2017name_2_1_2!=""),1,0)
x$boughtfert3<-ifelse((x$trip_fert_type2017name_3_1_1!=""|x$trip_fert_type2017name_3_1_2!=""),1,0)
x$boughtfert4<-ifelse((x$trip_fert_type2017name_4_1_1!=""),1,0)

x$boughtseeds1<-ifelse((x$trip_seeds_2017name_1_1_1!=""|x$trip_seeds_2017name_1_1_2!=""|x$trip_seeds_2017name_1_1_3!=""|x$trip_seeds_2017name_1_1_4!=""|x$trip_seeds_2017name_1_2_1!=""),1,0)
x$boughtseeds2<-ifelse((x$trip_seeds_2017name_2_1_1!=""|x$trip_seeds_2017name_2_1_2!=""|x$trip_seeds_2017name_2_1_3!=""),1,0)
x$boughtseeds3<-ifelse((x$trip_seeds_2017name_3_1_1!=""|x$trip_seeds_2017name_3_1_2!=""|x$trip_seeds_2017name_3_1_3!=""),1,0)

#here we extract the village for each trip that included fertilizer
x$vil1<-ifelse(x$boughtfert1==1|x$boughtseeds1==1,x$trip_buy_village2017_1_1,"")
x$vil2<-ifelse(x$boughtfert2==1|x$boughtseeds2==1,x$trip_buy_village2017_2_1,"")
x$vil3<-ifelse(x$boughtfert3==1|x$boughtseeds3==1,x$trip_buy_village2017_3_1,"")
x$vil4<-ifelse(x$boughtfert4==1,x$trip_buy_village2017_4_1,"")

#here we extract the ward for each trip that included fertilizer
x$ward1<-ifelse(x$boughtfert1==1|x$boughtseeds1==1,x$trip_buy_ward2017_1_1,"")
x$ward2<-ifelse(x$boughtfert2==1|x$boughtseeds2==1,x$trip_buy_ward2017_2_1,"")
x$ward3<-ifelse(x$boughtfert3==1|x$boughtseeds3==1,x$trip_buy_ward2017_3_1,"")
x$ward4<-ifelse(x$boughtfert4==1,x$trip_buy_ward2017_4_1,"")

#here we extract the district for each trip that included fertilizer
x$district1<-ifelse(x$boughtfert1==1|x$boughtseeds1==1,x$trip_buy_district2017_1_1,"")
x$district2<-ifelse(x$boughtfert2==1|x$boughtseeds2==1,x$trip_buy_district2017_2_1,"")
x$district3<-ifelse(x$boughtfert3==1|x$boughtseeds3==1,x$trip_buy_district2017_3_1,"")
x$district4<-ifelse(x$boughtfert4==1,x$trip_buy_district2017_4_1,"")

#here we extract the village for each trip that included seeds
x$svil1<-ifelse(x$boughtseeds1==1,x$trip_buy_village2017_1_1,"")
x$svil2<-ifelse(x$boughtseeds2==1,x$trip_buy_village2017_2_1,"")
x$svil3<-ifelse(x$boughtseeds3==1,x$trip_buy_village2017_3_1,"")

#here we extract the ward for each trip that included seeds
x$sward1<-ifelse(x$boughtseeds1==1,x$trip_buy_ward2017_1_1,"")
x$sward2<-ifelse(x$boughtseeds2==1,x$trip_buy_ward2017_2_1,"")
x$sward3<-ifelse(x$boughtseeds3==1,x$trip_buy_ward2017_3_1,"")

#here we extract the district for each trip that included seeds
x$sdistrict1<-ifelse(x$boughtseeds1==1,x$trip_buy_district2017_1_1,"")
x$sdistrict2<-ifelse(x$boughtseeds2==1,x$trip_buy_district2017_2_1,"")
x$sdistrict3<-ifelse(x$boughtseeds3==1,x$trip_buy_district2017_3_1,"")

zero<-function(f){return(ifelse(is.na(f),0,f))}

#trip_fert_quant2017_[trip number]_[agrovet number]_[n-th choice of fertilizer type]_[n-th choice of fertilizer brand]_kg is the KG's purchased by trip, agrovet number within trip, fertilizer type, and brand.  The function zero() makes these all numeric values

x$trip_fert_quant2017_1_1_1_1_kg<-zero(x$trip_fert_quant2017_1_1_1_1_kg)
x$trip_fert_quant2017_1_1_2_1_kg<-zero(x$trip_fert_quant2017_1_1_2_1_kg) 
x$trip_fert_quant2017_1_2_1_1_kg<-zero(x$trip_fert_quant2017_1_2_1_1_kg) 
x$trip_fert_quant2017_2_1_1_1_kg<-zero(x$trip_fert_quant2017_2_1_1_1_kg) 
x$trip_fert_quant2017_2_1_2_1_kg<-zero(x$trip_fert_quant2017_2_1_2_1_kg) 
x$trip_fert_quant2017_3_1_1_1_kg<-zero(x$trip_fert_quant2017_3_1_1_1_kg) 
x$trip_fert_quant2017_3_1_2_1_kg<-zero(x$trip_fert_quant2017_3_1_2_1_kg) 
x$trip_fert_quant2017_4_1_1_1_kg<-zero(x$trip_fert_quant2017_4_1_1_1_kg) 

#sum quantity purchased per trip
x$q1<-ifelse(x$boughtfert1==1,x$trip_fert_quant2017_1_1_1_1_kg+x$trip_fert_quant2017_1_1_2_1_kg+x$trip_fert_quant2017_1_2_1_1_kg,0)
x$q2<-ifelse(x$boughtfert2==1,x$trip_fert_quant2017_2_1_1_1_kg+x$trip_fert_quant2017_2_1_2_1_kg,0)
x$q3<-ifelse(x$boughtfert3==1,x$trip_fert_quant2017_3_1_1_1_kg+x$trip_fert_quant2017_3_1_2_1_kg ,0)
x$q4<-ifelse(x$boughtfert4==1,x$trip_fert_quant2017_4_1_1_1_kg,0)

#similar for seeds, though since there were so many see brands, we did not collect information by seed brand
x$trip_seeds_quant2017_1_1_1_kg<-zero(x$trip_seeds_quant2017_1_1_1_kg)
x$trip_seeds_quant2017_1_1_2_kg<-zero(x$trip_seeds_quant2017_1_1_2_kg) 
x$trip_seeds_quant2017_1_1_3_kg<-zero(x$trip_seeds_quant2017_1_1_3_kg) 
x$trip_seeds_quant2017_1_1_4_kg<-zero(x$trip_seeds_quant2017_1_1_4_kg) 

x$trip_seeds_quant2017_1_2_1_kg<-zero(x$trip_seeds_quant2017_1_2_1_kg) 
x$trip_seeds_quant2017_1_3_1_kg<-zero(x$trip_seeds_quant2017_1_3_1_kg) 

x$trip_seeds_quant2017_2_1_1_kg<-zero(x$trip_seeds_quant2017_2_1_1_kg) 
x$trip_seeds_quant2017_2_1_2_kg<-zero(x$trip_seeds_quant2017_2_1_2_kg) 
x$trip_seeds_quant2017_2_1_3_kg<-zero(x$trip_seeds_quant2017_2_1_3_kg) 

x$trip_seeds_quant2017_3_1_1_kg<-zero(x$trip_seeds_quant2017_3_1_1_kg) 
x$trip_seeds_quant2017_3_1_2_kg<-zero(x$trip_seeds_quant2017_3_1_2_kg) 
x$trip_seeds_quant2017_3_1_3_kg<-zero(x$trip_seeds_quant2017_3_1_3_kg) 

x$s1<-ifelse(x$boughtseeds1==1,x$trip_seeds_quant2017_1_1_1_kg+x$trip_seeds_quant2017_1_1_2_kg+x$trip_seeds_quant2017_1_1_3_kg+x$trip_seeds_quant2017_1_1_4_kg+x$trip_seeds_quant2017_1_2_1_kg+x$trip_seeds_quant2017_1_3_1_kg,0)
x$s2<-ifelse(x$boughtseeds2==1,x$trip_seeds_quant2017_2_1_1_kg+x$trip_seeds_quant2017_2_1_2_kg+x$trip_seeds_quant2017_2_1_3_kg,0)
x$s3<-ifelse(x$boughtseeds3==1,x$trip_seeds_quant2017_3_1_1_kg+x$trip_seeds_quant2017_3_1_2_kg+x$trip_seeds_quant2017_3_1_3_kg,0)

# Since the model only assumes one trip to an agrovet, we now determine on which trip the maximum amount of fertilizer was purchased.  

x$max_fert<-NA
x$trip_village_fert<-NA
x$trip_ward_fert<-NA
x$trip_district_fert<-NA

x$max_seeds<-NA
x$trip_village_seeds<-NA
x$trip_ward_seeds<-NA
x$trip_district_seeds<-NA

x$joint<-0

#cycle through and find the trip tha purchases the maximum amount of fertilizer

for(i in 1:nrow(x)){

	ss<-c(x$s1[i],x$s2[i],x$s3[i])	
	qs<-c(x$q1[i],x$q2[i],x$q3[i],x$q4[i])
	vs<-c(x$vil1[i],x$vil2[i],x$vil3[i],x$vil4[i])
	ws<-c(x$ward1[i],x$ward2[i],x$ward3[i],x$ward4[i])
	ds<-c(x$district1[i],x$district2[i],x$district3[i],x$district4[i])
	
	svs<-c(x$svil1[i],x$svil2[i],x$svil3[i])
	sws<-c(x$sward1[i],x$sward2[i],x$sward3[i])
	sds<-c(x$sdistrict1[i],x$sdistrict2[i],x$sdistrict3[i])
			
	highest_fert<-which.max(qs)
	x$max_fert[i]<-qs[highest_fert]	
	x$trip_village_fert[i]<-vs[highest_fert]
	x$trip_ward_fert[i]<-ws[highest_fert]
	x$trip_district_fert[i]<-ds[highest_fert]
	
	highest_seed<-which.max(ss)
	x$max_seeds[i]<-ss[highest_seed]
	x$trip_village_seeds[i]<-svs[highest_seed]
	x$trip_ward_seeds[i]<-sws[highest_seed]
	x$trip_district_seeds[i]<-sds[highest_seed]	
	
	x$joint[i]<-ifelse(highest_fert==highest_seed&qs[highest_fert]>0&ss[highest_seed],1,0)
		
}
  
diffvil<-nrow(subset(x,trip_village_fert!=trip_village_seeds&trip_village_fert!=""&trip_village_seeds!=""))
samevil<-nrow(subset(x,trip_village_fert==trip_village_seeds&trip_village_fert!=""&trip_village_seeds!=""))


### Percent buying fertilizer and seeds from the same village
samevil/(samevil+diffvil)
 
####  Drop observations where a different location is chosen for fertilizer and seeds - figure out what to do with them later
nrow(x)
#x<-subset(x,!(trip_village_fert!=trip_village_seeds&trip_village_fert!=""&trip_village_seeds!=""))
#x<-subset(x)
 nrow(x)
 
 ###Save for combination with Manyara
 
 write.csv(x,"Data/Temp/Manyara Farmer_mktaccess_trips_all_response.csv",row.names=FALSE)
 

######  Kilimanjaro 
 
######  Agrovet Trade Costs
 
x<-read.dta13("Data/Kilimanjaro Farmer_mktaccess.dta")

x<-subset(x,fert_use2017=="1. Yes")
#x<-subset(x,fert_use2017=="1. Yes"|hybridseed_use2017=="1. Yes")

##syntax here is trip_fert_type2017name_[trip number]_[agrovet number]_[n-th choice of fertilizer type]
##So, as an example, x$trip_fert_type2017name_1_2_1 is the type of fertilizer purchased during the first trip, at the second agrovet visited.  There is no x$trip_fert_type2017name_1_2_2 because there were no first trips in manyara where at the second agrovet visited a second type was purchased.
#In these next few lines, we determine whether fertilizer and seeds were purchased on each trip.

x$boughtfert1<-ifelse((x$trip_fert_type2017name_1_1_1!=""|x$trip_fert_type2017name_1_1_2!=""|x$trip_fert_type2017name_1_1_3!=""|x$trip_fert_type2017name_1_2_1!=""|x$trip_fert_type2017name_1_2_2!=""),1,0)
x$boughtfert2<-ifelse((x$trip_fert_type2017name_2_1_1!=""|x$trip_fert_type2017name_2_1_2!=""),1,0)
x$boughtfert3<-ifelse((x$trip_fert_type2017name_3_1_1!=""|x$trip_fert_type2017name_3_1_2!=""),1,0)
x$boughtfert4<-ifelse((x$trip_fert_type2017name_4_1_1!=""|x$trip_fert_type2017name_4_1_2!=""),1,0)
x$boughtfert5<-ifelse((x$trip_fert_type2017name_5_1_1!=""|x$trip_fert_type2017name_5_1_2!=""),1,0)
x$boughtfert8<-ifelse((x$trip_fert_type2017name_8_1_1!=""),1,0)

x$boughtseeds1<-ifelse((x$trip_seeds_2017name_1_1_1!=""|x$trip_seeds_2017name_1_1_2!=""|x$trip_seeds_2017name_1_2_1!=""|x$trip_seeds_2017name_1_2_2!=""),1,0)
x$boughtseeds2<-ifelse((x$trip_seeds_2017name_2_1_1!=""|x$trip_seeds_2017name_2_1_2!=""),1,0)
x$boughtseeds3<-ifelse((x$trip_seeds_2017name_3_1_1!=""|x$trip_seeds_2017name_3_1_2!=""),1,0)


#### DO I NEED THESE LINES?
#subx1<-subset(x,fert_use2017=="1. Yes")[,c("trip_buy_village2017_1_1","trip_buy_village2017_2_1","trip_buy_village2017_3_1","trip_buy_village2017_4_1","trip_buy_village2017_5_1","trip_buy_village2017_8_1")]
#
#subx2<-subset(x,fert_use2017=="1. Yes")[,c("boughtfert1","boughtfert2","boughtfert3","boughtfert4","boughtfert5","boughtfert8")]
#
#subx1$fert<-ifelse(rowSums(subx2)>0,1,0)
#
#subx1$fertvil<-ifelse(paste(subx1$trip_buy_village2017_1_1,subx1$trip_buy_village2017_2_1,subx1$trip_buy_village2017_3_1,subx1$trip_buy_village2017_4_1,subx1$trip_buy_village2017_5_1,subx1$trip_buy_village2017_8_1,sep="")!="",1,0)

#Identify villages at which fertilizer and seeds were purchased at (similar syntax to the Manyara section earlier)

x$vil1<-ifelse(x$boughtfert1==1|x$boughtseeds1==1,x$trip_buy_village2017_1_1,"")
x$vil2<-ifelse(x$boughtfert2==1|x$boughtseeds2==1,x$trip_buy_village2017_2_1,"")
x$vil3<-ifelse(x$boughtfert3==1|x$boughtseeds3==1,x$trip_buy_village2017_3_1,"")
x$vil4<-ifelse(x$boughtfert4==1,x$trip_buy_village2017_4_1,"")
x$vil5<-ifelse(x$boughtfert5==1,x$trip_buy_village2017_5_1,"")
x$vil8<-ifelse(x$boughtfert8==1,x$trip_buy_village2017_8_1,"")

x$ward1<-ifelse(x$boughtfert1==1|x$boughtseeds1==1,x$trip_buy_ward2017_1_1,"")
x$ward2<-ifelse(x$boughtfert2==1|x$boughtseeds2==1,x$trip_buy_ward2017_2_1,"")
x$ward3<-ifelse(x$boughtfert3==1|x$boughtseeds3==1,x$trip_buy_ward2017_3_1,"")
x$ward4<-ifelse(x$boughtfert4==1,x$trip_buy_ward2017_4_1,"")
x$ward5<-ifelse(x$boughtfert5==1,x$trip_buy_ward2017_5_1,"")
x$ward8<-ifelse(x$boughtfert8==1,x$trip_buy_ward2017_8_1,"")

x$district1<-ifelse(x$boughtfert1==1|x$boughtseeds1==1,x$trip_buy_district2017_1_1,"")
x$district2<-ifelse(x$boughtfert2==1|x$boughtseeds2==1,x$trip_buy_district2017_2_1,"")
x$district3<-ifelse(x$boughtfert3==1|x$boughtseeds3==1,x$trip_buy_district2017_3_1,"")
x$district4<-ifelse(x$boughtfert4==1,x$trip_buy_district2017_4_1,"")
x$district5<-ifelse(x$boughtfert5==1,x$trip_buy_district2017_5_1,"")
x$district8<-ifelse(x$boughtfert8==1,x$trip_buy_district2017_8_1,"")


x$svil1<-ifelse(x$boughtseeds1==1,x$trip_buy_village2017_1_1,"")
x$svil2<-ifelse(x$boughtseeds2==1,x$trip_buy_village2017_2_1,"")
x$svil3<-ifelse(x$boughtseeds3==1,x$trip_buy_village2017_3_1,"")

x$sward1<-ifelse(x$boughtseeds1==1,x$trip_buy_ward2017_1_1,"")
x$sward2<-ifelse(x$boughtseeds2==1,x$trip_buy_ward2017_2_1,"")
x$sward3<-ifelse(x$boughtseeds3==1,x$trip_buy_ward2017_3_1,"")

x$sdistrict1<-ifelse(x$boughtseeds1==1,x$trip_buy_district2017_1_1,"")
x$sdistrict2<-ifelse(x$boughtseeds2==1,x$trip_buy_district2017_2_1,"")
x$sdistrict3<-ifelse(x$boughtseeds3==1,x$trip_buy_district2017_3_1,"")


zero<-function(f){return(ifelse(is.na(f),0,f))}

#trip_fert_quant2017_[trip number]_[agrovet number]_[n-th choice of fertilizer type]_[n-th choice of fertilizer brand]_kg is the KG's purchased by trip, agrovet number within trip, fertilizer type, and brand.  The function zero() makes these all numeric values

x$trip_fert_quant2017_1_1_1_1_kg<-zero(x$trip_fert_quant2017_1_1_1_1_kg)
x$trip_fert_quant2017_1_1_2_1_kg<-zero(x$trip_fert_quant2017_1_1_2_1_kg) 
x$trip_fert_quant2017_1_2_1_1_kg<-zero(x$trip_fert_quant2017_1_2_1_1_kg) 
x$trip_fert_quant2017_1_2_2_1_kg<-zero(x$trip_fert_quant2017_1_2_2_1_kg) 
x$trip_fert_quant2017_2_1_1_1_kg<-zero(x$trip_fert_quant2017_2_1_1_1_kg) 
x$trip_fert_quant2017_2_1_2_1_kg<-zero(x$trip_fert_quant2017_2_1_2_1_kg) 
x$trip_fert_quant2017_3_1_1_1_kg<-zero(x$trip_fert_quant2017_3_1_1_1_kg) 
x$trip_fert_quant2017_3_1_1_2_kg<-zero(x$trip_fert_quant2017_3_1_1_2_kg) 
x$trip_fert_quant2017_3_1_2_1_kg<-zero(x$trip_fert_quant2017_3_1_2_1_kg) 
x$trip_fert_quant2017_4_1_1_1_kg<-zero(x$trip_fert_quant2017_4_1_1_1_kg) 
x$trip_fert_quant2017_4_1_2_1_kg<-zero(x$trip_fert_quant2017_4_1_2_1_kg) 
x$trip_fert_quant2017_5_1_1_1_kg<-zero(x$trip_fert_quant2017_5_1_1_1_kg) 
x$trip_fert_quant2017_5_1_2_1_kg<-zero(x$trip_fert_quant2017_5_1_2_1_kg) 
x$trip_fert_quant2017_8_1_1_1_kg<-zero(x$trip_fert_quant2017_8_1_1_1_kg)

#sum the quantity of fertilizer bought on each trip

x$q1<-ifelse(x$boughtfert1==1,x$trip_fert_quant2017_1_1_1_1_kg+x$trip_fert_quant2017_1_1_2_1_kg+x$trip_fert_quant2017_1_2_1_1_kg+x$trip_fert_quant2017_1_2_2_1_kg,0)
x$q2<-ifelse(x$boughtfert2==1,x$trip_fert_quant2017_2_1_1_1_kg+x$trip_fert_quant2017_2_1_2_1_kg,0)
x$q3<-ifelse(x$boughtfert3==1,x$trip_fert_quant2017_3_1_1_1_kg+x$trip_fert_quant2017_3_1_1_2_kg+x$trip_fert_quant2017_3_1_2_1_kg ,0)
x$q4<-ifelse(x$boughtfert4==1,x$trip_fert_quant2017_4_1_1_1_kg+x$trip_fert_quant2017_4_1_2_1_kg,0)
x$q5<-ifelse(x$boughtfert5==1,x$trip_fert_quant2017_5_1_1_1_kg+x$trip_fert_quant2017_5_1_2_1_kg,0)
x$q8<-ifelse(x$boughtfert8==1,x$trip_fert_quant2017_8_1_1_1_kg,0)

#sum the quantity of seeds bought on each trip

x$trip_seeds_quant2017_1_1_1_kg<-zero(x$trip_seeds_quant2017_1_1_1_kg)
x$trip_seeds_quant2017_1_1_2_kg<-zero(x$trip_seeds_quant2017_1_1_2_kg) 
x$trip_seeds_quant2017_1_2_1_kg<-zero(x$trip_seeds_quant2017_1_2_1_kg) 
x$trip_seeds_quant2017_1_2_2_kg<-zero(x$trip_seeds_quant2017_1_2_2_kg) 
x$trip_seeds_quant2017_2_1_1_kg<-zero(x$trip_seeds_quant2017_2_1_1_kg) 
x$trip_seeds_quant2017_2_1_2_kg<-zero(x$trip_seeds_quant2017_2_1_2_kg) 
x$trip_seeds_quant2017_3_1_1_kg<-zero(x$trip_seeds_quant2017_3_1_1_kg) 
x$trip_seeds_quant2017_3_1_2_kg<-zero(x$trip_seeds_quant2017_3_1_2_kg) 

x$s1<-ifelse(x$boughtseeds1==1,x$trip_seeds_quant2017_1_1_1_kg+x$trip_seeds_quant2017_1_1_2_kg+x$trip_seeds_quant2017_1_2_1_kg+x$trip_seeds_quant2017_1_2_2_kg,0)
x$s2<-ifelse(x$boughtseeds2==1,x$trip_seeds_quant2017_2_1_1_kg+x$trip_seeds_quant2017_2_1_2_kg,0)
x$s3<-ifelse(x$boughtseeds3==1,x$trip_seeds_quant2017_3_1_1_kg+x$trip_seeds_quant2017_3_1_2_kg ,0)

x$max_fert<-NA
x$trip_village_fert<-NA
x$trip_ward_fert<-NA
x$trip_district_fert<-NA

x$max_seeds<-NA
x$trip_village_seeds<-NA
x$trip_ward_seeds<-NA
x$trip_district_seeds<-NA

x$joint<-0

#find trip where maximum fertilizer was purchased

for(i in 1:nrow(x)){

	ss<-c(x$s1[i],x$s2[i],x$s3[i])	
	qs<-c(x$q1[i],x$q2[i],x$q3[i],x$q4[i],x$q5[i],x$q8[i])
	vs<-c(x$vil1[i],x$vil2[i],x$vil3[i],x$vil4[i],x$vil5[i],x$vil8[i])
	ws<-c(x$ward1[i],x$ward2[i],x$ward3[i],x$ward4[i],x$ward5[i],x$ward8[i])
	ds<-c(x$district1[i],x$district2[i],x$district3[i],x$district4[i],x$district5[i],x$district8[i])
	
	svs<-c(x$svil1[i],x$svil2[i],x$svil3[i])
	sws<-c(x$sward1[i],x$sward2[i],x$sward3[i])
	sds<-c(x$sdistrict1[i],x$sdistrict2[i],x$sdistrict3[i])
			
	highest_fert<-which.max(qs)
	x$max_fert[i]<-qs[highest_fert]	
	x$trip_village_fert[i]<-vs[highest_fert]
	x$trip_ward_fert[i]<-ws[highest_fert]
	x$trip_district_fert[i]<-ds[highest_fert]
	
	highest_seed<-which.max(ss)
	x$max_seeds[i]<-ss[highest_seed]
	x$trip_village_seeds[i]<-svs[highest_seed]
	x$trip_ward_seeds[i]<-sws[highest_seed]
	x$trip_district_seeds[i]<-sds[highest_seed]	
	
	x$joint[i]<-ifelse(highest_fert==highest_seed&qs[highest_fert]>0&ss[highest_seed],1,0)
		
}
  
diffvil<-nrow(subset(x,trip_village_fert!=trip_village_seeds&trip_village_fert!=""&trip_village_seeds!=""))
samevil<-nrow(subset(x,trip_village_fert==trip_village_seeds&trip_village_fert!=""&trip_village_seeds!=""))


### Percent buying fertilizer and seeds from the same village
samevil/(samevil+diffvil)
 
 
 ###Save for combination with Manyara
 
 write.csv(x,"Data/Temp/Kilimanjaro Farmer_mktaccess_trips_all_response.csv",row.names=FALSE)
 
 
#Now we combine the files to create the dataset used in Stata with the McFadden estimator.   

library(readstata13)
 
xK<-read.csv("Data/Temp/Kilimanjaro Farmer_mktaccess_trips_all_response.csv",header=TRUE)
	prexK<-xK[,c("farmerid","village_name","ward","market","district","fert_use2017","hybridseed_use2017","joint","trip_village_fert","trip_village_seeds","trip_ward_fert","trip_ward_seeds","trip_district_fert","trip_district_seeds","farmer_total_income_USD_w5")]
	prexK$region<-"KILIMANJARO"

xM<-read.csv("Data/Temp/Manyara Farmer_mktaccess_trips_all_response.csv",header=TRUE)
	prexM<-xM[,c("farmerid","survey_village","survey_ward","market","survey_district","fert_use2017","hybridseed_use2017","joint","trip_village_fert","trip_village_seeds","trip_ward_fert","trip_ward_seeds","trip_district_fert","trip_district_seeds","farmer_total_income_USD_w5")]
	names(prexM)[2:5]<-c("village_name","ward","market","district")
	prexM$region<-"MANYARA"

nrow(prexK)	 
length(unique(prexK$farmerid))
nrow(prexM)	 
length(unique(prexM$farmerid))

prexM$farmerid<-(-prexM$farmerid)  #put a negative sign in front of farmerids for Manyara to make sure there are no duplicate numbers with Kilimanjaro

x<-rbind(prexK,prexM)   #merge Kili and Manyara

x$farmer_total_income<-x$farmer_total_income_USD_w5

nrow(x)	 
length(unique(x$farmerid))

#### Combine into bundle X location panel
 
x$home_pair<-paste(x$village_name,x$ward)  #define the home village

x$trip_village_fert<-as.character(x$trip_village_fert)
x$trip_village_seeds<-as.character(x$trip_village_seeds)

x$trip_ward_fert<-as.character(x$trip_ward_fert)
x$trip_ward_seeds<-as.character(x$trip_ward_seeds)

x$trip_district_fert<-as.character(x$trip_district_fert)
x$trip_district_seeds<-as.character(x$trip_district_seeds)

x$trip_village<-ifelse(x$trip_village_fert!="",x$trip_village_fert,"")  #use the village, ward, district combo for fertilizer, and if not reported, seeds
x$trip_ward<-ifelse(x$trip_ward_fert!="",x$trip_ward_fert,"")
x$trip_district<-ifelse(x$trip_district_fert!="",x$trip_district_fert,"")

x$trip_village<-ifelse(x$trip_village_fert!="",x$trip_village_fert,x$trip_village_seeds)  #use the village, ward, district combo for fertilizer, and if not reported, seeds
x$trip_ward<-ifelse(x$trip_ward_fert!="",x$trip_ward_fert,x$trip_ward_seeds)
x$trip_district<-ifelse(x$trip_district_fert!="",x$trip_district_fert,x$trip_district_seeds)

x$trip_district<-ifelse(x$trip_district=="Simanjiro","SIMANJIRO",x$trip_district)
x$trip_district<-ifelse(x$trip_district=="BABATI_MJINI","BABATI MJINI",x$trip_district)

#### Remove locations for which we do not have gps coordinates
nrow(x)
x<-subset(x,trip_district!="ARUSHA MJINI")
nrow(x)
x<-subset(x,trip_district!="HALMASHAURI YA_KOROGWE")
nrow(x)
x<-subset(x,trip_district!="KONDOA")
nrow(x)
x<-subset(x,trip_district!="MERU")
nrow(x)
x<-subset(x,trip_district!="MONDULI")
nrow(x)

x1<-subset(x,trip_village==""&trip_ward==""&trip_district=="")
x2<-subset(x,trip_village!=""&trip_ward!=""&trip_district!="")
x<-rbind(x1,x2)
nrow(x)

x<-subset(x,fert_use2017=="1. Yes")  #only observations where fertilizer was purchased (so location of seeds from above is irrelevant)

x$choice<-"fert"

nrow(x)
x<-subset(x,is.na(choice)==FALSE)
nrow(x)

x$trip_pair<-paste(x$trip_village,x$trip_ward)  #Define destination pair

length(sort(unique(paste(x$trip_village,x$trip_ward))))
length(sort(unique(paste(x$trip_village,x$trip_ward,x$trip_district,x$region))))

trips<-sort(unique(paste(x$trip_village,x$trip_ward)))  #define set of all villages listed as a trip in our data

#define new variables for each trip destination
trip_pair<-as.character(tapply(x$trip_pair,x$trip_pair,FUN=unique))
ag_village<-as.character(tapply(x$trip_village,x$trip_pair,FUN=unique))
ag_ward<-as.character(tapply(x$trip_ward,x$trip_pair,FUN=unique))
ag_district<-as.character(tapply(x$trip_district,x$trip_pair,FUN=unique))
ag_trip_pair<-as.character(tapply(x$trip_pair,x$trip_pair,FUN=unique))

###  Define size of balanced matrix
N<-nrow(x)*length(trips)

d<-matrix(nrow=N,ncol=15)
d<-as.data.frame(d)
names(d)<-c("id","income","village","ward","market","district","region","trip_pair","ag_village","ag_ward","ag_district","ag_trip_pair","ag_bundle","choice","select")

prex<-x
prex$fert_use2017<-as.character(prex$fert_use2017)
prex$hybridseed_use2017<-as.character(prex$hybridseed_use2017)
prex$hybridseed_use2017<-ifelse(prex$fert_use2017=="1. Yes","1. Yes",prex$hybridseed_use2017)
prex$hybridseed_use2017<-ifelse(is.na(prex$hybridseed_use2017),"0. No",prex$hybridseed_use2017)
prex$fert_use2017<-ifelse(is.na(prex$fert_use2017),"0. No",prex$fert_use2017)

sbase<-matrix(nrow=length(trips),ncol=15)  #This is the matrix that will be populated for each individual, with a 1 in the trip destination that corresponds to that individual, and zero for all other trip destinations
sbase<-as.data.frame(sbase)
names(sbase)<-c("id","income","village","ward","market","district","region","trip_pair","ag_village","ag_ward","ag_district","ag_trip_pair","ag_bundle","choice","select")

sbase$ag_village<-ag_village
sbase$ag_ward<-ag_ward
sbase$ag_district<-ag_district
sbase$ag_trip_pair<-ag_trip_pair
sbase$ag_bundle[1:length(trips)]<-"fert"

#populate the  matrix d

for(i in 1:nrow(prex)){
	
	sx<-prex[i,]
	
		sd<-sbase
	
		sd$id<-as.numeric(sx$farmerid)
		sd$income<-as.numeric(sx$farmer_total_income)
		sd$village<-as.character(sx$village_name)
		sd$ward<-as.character(sx$ward)
		sd$market<-as.character(sx$market)
		sd$district<-as.character(sx$district)	
		sd$region<-as.character(sx$region)	
		sd$trip_pair<-as.character(sx$trip_pair)	
		
		sd$choice<-sx$choice

		sd$select<-ifelse(sd$trip_pair==sd$ag_trip_pair&sd$choice==sd$ag_bundle,1,0)	
	
		count0<-(i-1)*length(trips)*1+1
		count1<-(i)*length(trips)*1
		
		d[count0:count1,]<-sd
		
		print(i)
}

names(d)[8]<-"choice_pair"

#d$ag_village<-ifelse(d$ag_trip_pair=="NoAdopt NoAdopt","NoAdopt",d$ag_village)
#d$ag_ward<-ifelse(d$ag_trip_pair=="NoAdopt NoAdopt","NoAdopt",d$ag_ward)
#d$ag_district<-ifelse(d$ag_trip_pair=="NoAdopt NoAdopt","NoAdopt",d$ag_district)

d$home_pair<-paste(d$village,d$ward,d$district)
d$trip_pair<-paste(d$ag_village,d$ag_ward,d$ag_district)

nrow(d)

#check to make sure each farmer only takes one trip in the dataset
  d$check<-as.numeric(ave(d$select,d$id,FUN=function(f){sum(f,na.rm=TRUE)}))
  nrow(subset(d,check==0))
  nrow(subset(d,check==1))
  nrow(subset(d,check==2))

#Merge distance data

  z<-read.dta13("Data/PooledRegions ViltoVil_KMCOST_share.dta")
	
	z$same<-ifelse(z$survey_region_O==z$survey_region_D&z$survey_district_O==z$survey_district_D&z$survey_ward_O==z$survey_ward_D&z$survey_village_O==z$survey_village_D,1,0)

	z$DIST_km_direct_rural<-z$DIST_km_direct*z$rural_km_share	
	z$DIST_km_direct_main<-z$DIST_km_direct*(1-z$rural_km_share)

  z$pair_dest<-paste(as.character(z$survey_village_D),as.character(z$survey_ward_D),as.character(z$survey_district_D))
  z$pair_origin<-paste(as.character(z$survey_village_O),as.character(z$survey_ward_O),as.character(z$survey_district_O))
  
  nrow(z)



sz<-z[,c("pair_dest","pair_origin","primary_market_O","primary_market_D","DIST_km","DIST_cost_USD","DIST_km_direct","DIST_km_direct_rural","DIST_km_direct_main","DIST_cost_USD_direct","arcdist_vil_vil_km")]
names(sz)<-c("trip_pair","home_pair","primary_market_O","primary_market_D","DIST_km","DIST_cost_USD","DIST_km_direct","DIST_km_direct_rural","DIST_km_direct_main","DIST_cost_USD_direct","arcdist_vil_vil_km")

#sz<-z[,c("pair_dest","pair_origin","primary_market_O","primary_market_D","DIST_km","DIST_cost_USD","DIST_km_direct","DIST_km_direct_rural","DIST_km_direct_main","DIST_cost_USD_direct")]
#names(sz)<-c("trip_pair","home_pair","primary_market_O","primary_market_D","DIST_km","DIST_cost_USD","DIST_km_direct","DIST_km_direct_rural","DIST_km_direct_main","DIST_cost_USD_direct")

d$trip_pair<-gsub("_","",d$trip_pair)
d$home_pair<-gsub("_","",d$home_pair)

sz$trip_pair<-gsub("_","",sz$trip_pair)
sz$home_pair<-gsub("_","",sz$home_pair)

length(setdiff(unique(sz$trip_pair),unique(d$trip_pair)))
length(unique(sz$trip_pair))
length(unique(d$trip_pair))


#merge distance with trip data
nrow(d)
d<-merge(d,sz,all.x=TRUE)
nrow(d)

d$check<-as.numeric(ave(d$select,d$id,FUN=function(f){sum(f,na.rm=TRUE)}))
nrow(subset(d,check==0))
nrow(subset(d,check==1))
nrow(subset(d,check==2))

## make sure that distances to your origin village is zero
d$arcdist_vil_vil_km<-ifelse(d$home_pair==d$trip_pair&is.na(d$arcdist_vil_vil_km)==TRUE,0,d$arcdist_vil_vil_km)
d$DIST_km<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_km)==TRUE,0,d$DIST_km)
d$DIST_cost_USD<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_cost_USD)==TRUE,0,d$DIST_cost_USD)
d$DIST_km_direct<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_km_direct)==TRUE,0,d$DIST_km_direct)
d$DIST_cost_USD_direct<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_cost_USD_direct)==TRUE,0,d$DIST_cost_USD_direct)
d$DIST_km_direct<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_km_direct)==TRUE,0,d$DIST_km_direct)
d$DIST_km_direct_rural<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_km_direct_rural)==TRUE,0,d$DIST_km_direct_rural)
d$DIST_km_direct_main<-ifelse(d$home_pair==d$trip_pair&is.na(d$DIST_km_direct_main)==TRUE,0,d$DIST_km_direct_main)
#d$google_vil_mkt_km_O<-ifelse(d$home_pair==d$trip_pair&is.na(d$google_vil_mkt_km_O)==TRUE,0,d$google_vil_mkt_km_O)
#d$google_vil_mkt_km_D<-ifelse(d$home_pair==d$trip_pair&is.na(d$google_vil_mkt_km_D)==TRUE,0,d$google_vil_mkt_km_D)

newd<-d
#newd$district_adopt<-paste(newd$district,newd$agadopt)
#newd$market_adopt<-paste(newd$market,newd$agadopt)


write.csv(newd,"Data/buying_inputs_MNL_replication.csv",na=".",row.names=FALSE)



